{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "41c10b28",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = ''\n",
    "os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "87c274b5",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import numpy as np\n",
    "import transformers\n",
    "from transformers import RobertaTokenizer, AutoModelForMaskedLM, RobertaConfig, RobertaForMaskedLM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "cf946f91",
   "metadata": {},
   "outputs": [],
   "source": [
    "tokenizer = RobertaTokenizer.from_pretrained('./malay-cased-roberta-base')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a917c696",
   "metadata": {},
   "outputs": [],
   "source": [
    "model = RobertaForMaskedLM.from_pretrained('./malay-cased-roberta-base-mlm/checkpoint-840000')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "c58b40eb",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/husein/.local/lib/python3.8/site-packages/huggingface_hub/utils/_deprecation.py:39: FutureWarning: Pass token='roberta-base-cased' as keyword args. From version 0.7 passing these as positional arguments will result in an error\n",
      "  warnings.warn(\n",
      "/home/husein/.local/lib/python3.8/site-packages/huggingface_hub/hf_api.py:79: FutureWarning: `name` and `organization` input arguments are deprecated and will be removed in v0.7. Pass `repo_id` instead.\n",
      "  warnings.warn(\n",
      "/home/husein/.local/lib/python3.8/site-packages/huggingface_hub/hf_api.py:596: FutureWarning: `create_repo` now takes `token` as an optional positional argument. Be sure to adapt your code!\n",
      "  warnings.warn(\n",
      "Cloning https://huggingface.co/mesolitica/roberta-base-cased into local empty directory.\n"
     ]
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "45242f748d644dc38d77137341af3ddd",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Upload file pytorch_model.bin:   0%|          | 32.0k/422M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "remote: Scanning LFS files for validity, may be slow...        \n",
      "remote: LFS file scan complete.        \n",
      "To https://huggingface.co/mesolitica/roberta-base-cased\n",
      "   9c1089f..6303ac6  main -> main\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'https://huggingface.co/mesolitica/roberta-base-cased/commit/6303ac6c6d396e0be297c8a9c7dd02c8e38a0745'"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.push_to_hub('roberta-base-bahasa-cased', organization='mesolitica')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "59b6ed3a",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "To https://huggingface.co/mesolitica/roberta-base-cased\n",
      "   95bce61..e989aba  main -> main\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'https://huggingface.co/mesolitica/roberta-base-cased/commit/e989aba21aa642df3dd9f323897c5908c44975df'"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tokenizer.push_to_hub('roberta-base-bahasa-cased', organization='mesolitica')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "f97e7f65",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Already up to date.\r\n"
     ]
    }
   ],
   "source": [
    "!cd roberta-base-cased && git pull"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "85c89118",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[main 8ac15dd] add tensorboard\n",
      " 14 files changed, 42 insertions(+)\n",
      " create mode 100644 runs/Sep04_15-16-35_husein-MS-7D31/1662276236.3800876/events.out.tfevents.1662276236.husein-MS-7D31.155553.1\n",
      " create mode 100644 runs/Sep04_15-16-35_husein-MS-7D31/events.out.tfevents.1662276236.husein-MS-7D31.155553.0\n",
      " create mode 100644 runs/Sep04_18-32-46_husein-MS-7D31/1662287576.0371726/events.out.tfevents.1662287576.husein-MS-7D31.188940.1\n",
      " create mode 100644 runs/Sep04_18-32-46_husein-MS-7D31/events.out.tfevents.1662287576.husein-MS-7D31.188940.0\n",
      " create mode 100644 runs/Sep04_18-33-31_husein-MS-7D31/1662287620.2240365/events.out.tfevents.1662287620.husein-MS-7D31.189245.1\n",
      " create mode 100644 runs/Sep04_18-33-31_husein-MS-7D31/events.out.tfevents.1662287620.husein-MS-7D31.189245.0\n",
      " create mode 100644 runs/Sep04_19-31-24_husein-MS-7D31/1662291099.796556/events.out.tfevents.1662291099.husein-MS-7D31.3886.1\n",
      " create mode 100644 runs/Sep04_19-31-24_husein-MS-7D31/events.out.tfevents.1662291099.husein-MS-7D31.3886.0\n",
      " create mode 100644 runs/Sep04_19-59-42_husein-MS-7D31/1662292790.132384/events.out.tfevents.1662292790.husein-MS-7D31.14826.1\n",
      " create mode 100644 runs/Sep04_19-59-42_husein-MS-7D31/events.out.tfevents.1662292790.husein-MS-7D31.14826.0\n",
      " create mode 100644 runs/Sep04_20-02-08_husein-MS-7D31/1662292935.7282245/events.out.tfevents.1662292935.husein-MS-7D31.15651.1\n",
      " create mode 100644 runs/Sep04_20-02-08_husein-MS-7D31/events.out.tfevents.1662292935.husein-MS-7D31.15651.0\n",
      " create mode 100644 runs/Sep04_20-02-58_husein-MS-7D31/1662292985.5532491/events.out.tfevents.1662292985.husein-MS-7D31.15936.1\n",
      " create mode 100644 runs/Sep04_20-02-58_husein-MS-7D31/events.out.tfevents.1662292985.husein-MS-7D31.15936.0\n",
      "Uploading LFS objects: 100% (14/14), 332 KB | 72 KB/s, done.                    \n",
      "Enumerating objects: 32, done.\n",
      "Counting objects: 100% (32/32), done.\n",
      "Delta compression using up to 20 threads\n",
      "Compressing objects: 100% (31/31), done.\n",
      "Writing objects: 100% (31/31), 3.55 KiB | 3.55 MiB/s, done.\n",
      "Total 31 (delta 1), reused 0 (delta 0)\n",
      "remote: Scanning LFS files for validity, may be slow...\u001b[K\n",
      "remote: LFS file scan complete.\u001b[K\n",
      "To https://huggingface.co/mesolitica/roberta-base-cased\n",
      "   b95167d..8ac15dd  main -> main\n"
     ]
    }
   ],
   "source": [
    "!cp -r malay-cased-roberta-base-mlm/runs roberta-base-bahasa-cased\n",
    "!cd roberta-base-bahasa-cased && git add . && git commit -m 'add tensorboard' && git push"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "e83f4238",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "e924bcdd203d406bb312db6533f82675",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading config.json:   0%|          | 0.00/708 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a6dd7a95a2f14f8e8da43b9866256a32",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading pytorch_model.bin:   0%|          | 0.00/422M [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "49df2aed33fd42e6b0c487724b061c6b",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading tokenizer_config.json:   0%|          | 0.00/1.32k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "7e059a8d9d68422e8e69d24f19cebefe",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading vocab.json:   0%|          | 0.00/600k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "4530d54989db47caa0bce802f89a1d4e",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading merges.txt:   0%|          | 0.00/266k [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "a99729fb450a4aadb593c3a55b534d39",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Downloading special_tokens_map.json:   0%|          | 0.00/957 [00:00<?, ?B/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from transformers import AutoTokenizer, AutoModelForMaskedLM, pipeline\n",
    "\n",
    "model = AutoModelForMaskedLM.from_pretrained('mesolitica/roberta-base-bahasa-cased')\n",
    "tokenizer = AutoTokenizer.from_pretrained(\n",
    "    'mesolitica/roberta-base-bahasa-cased',\n",
    "    do_lower_case = False,\n",
    ")\n",
    "fill_mask = pipeline('fill-mask', model=model, tokenizer=tokenizer)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "39a6c6c0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'score': 0.3368818759918213,\n",
       "  'token': 746,\n",
       "  'token_str': ' negara',\n",
       "  'sequence': 'Permohonan Najib, anak untuk dengar isu perlembagaan negara.'},\n",
       " {'score': 0.09646568447351456,\n",
       "  'token': 598,\n",
       "  'token_str': ' Malaysia',\n",
       "  'sequence': 'Permohonan Najib, anak untuk dengar isu perlembagaan Malaysia.'},\n",
       " {'score': 0.029483484104275703,\n",
       "  'token': 3265,\n",
       "  'token_str': ' UMNO',\n",
       "  'sequence': 'Permohonan Najib, anak untuk dengar isu perlembagaan UMNO.'},\n",
       " {'score': 0.026470622047781944,\n",
       "  'token': 2562,\n",
       "  'token_str': ' parti',\n",
       "  'sequence': 'Permohonan Najib, anak untuk dengar isu perlembagaan parti.'},\n",
       " {'score': 0.023237623274326324,\n",
       "  'token': 391,\n",
       "  'token_str': ' ini',\n",
       "  'sequence': 'Permohonan Najib, anak untuk dengar isu perlembagaan ini.'}]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "fill_mask('Permohonan Najib, anak untuk dengar isu perlembagaan <mask> .')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9a1b3174",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
